{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Facets Dive and Overview Colab Example",
      "version": "0.3.2",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "M7JcESAhpKG-",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#@title Install the facets_overview pip package.\n",
        "!pip install facets-overview"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "blPpZw5R3Bb4",
        "colab": {}
      },
      "source": [
        "# Load UCI census train and test data into dataframes.\n",
        "import pandas as pd\n",
        "features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n",
        "            \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n",
        "            \"Hours per week\", \"Country\", \"Target\"]\n",
        "train_data = pd.read_csv(\n",
        "    \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n",
        "    names=features,\n",
        "    sep=r'\\s*,\\s*',\n",
        "    engine='python',\n",
        "    na_values=\"?\")\n",
        "test_data = pd.read_csv(\n",
        "    \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n",
        "    names=features,\n",
        "    sep=r'\\s*,\\s*',\n",
        "    skiprows=[0],\n",
        "    engine='python',\n",
        "    na_values=\"?\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "XtOzRy8Z3M36",
        "colab": {}
      },
      "source": [
        "\n",
        "# Display the Dive visualization for the training data.\n",
        "from IPython.core.display import display, HTML\n",
        "\n",
        "jsonstr = train_data.to_json(orient='records')\n",
        "HTML_TEMPLATE = \"\"\"\n",
        "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
        "        <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
        "        <facets-dive id=\"elem\" height=\"600\"></facets-dive>\n",
        "        <script>\n",
        "          var data = {jsonstr};\n",
        "          document.querySelector(\"#elem\").data = data;\n",
        "        </script>\"\"\"\n",
        "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n",
        "display(HTML(html))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "mjv5Kr1Mflq7",
        "colab": {}
      },
      "source": [
        "# Create the feature stats for the datasets and stringify it.\n",
        "import base64\n",
        "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
        "\n",
        "gfsg = GenericFeatureStatisticsGenerator()\n",
        "proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},\n",
        "                                  {'name': 'test', 'table': test_data}])\n",
        "protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "b7zs2p2_goJa",
        "colab": {}
      },
      "source": [
        "# Display the facets overview visualization for this data\n",
        "from IPython.core.display import display, HTML\n",
        "\n",
        "HTML_TEMPLATE = \"\"\"\n",
        "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
        "        <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\" >\n",
        "        <facets-overview id=\"elem\"></facets-overview>\n",
        "        <script>\n",
        "          document.querySelector(\"#elem\").protoInput = \"{protostr}\";\n",
        "        </script>\"\"\"\n",
        "html = HTML_TEMPLATE.format(protostr=protostr)\n",
        "display(HTML(html))"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}